Carga de librerías a usar:
%%capture
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from holoviews import opts, dim
import holoviews as hv
from bokeh.plotting import figure, output_file, save, show
from bokeh.io import output_notebook ;
Carga de los datos. Utilizaremos dos archivos: uno con las ID's de las estaciones y otro con los nombres de los barrios.
Los datasets se encuentran en:
dfa = pd.read_csv('202003-citibike-tripdata.csv',\
usecols=['start station id','end station id'])
dfstations=pd.read_parquet('202009-stations.parquet')
dfstations.head(10)
| neighborhood | boro | zipcode | station name | latitude | longitude | |
|---|---|---|---|---|---|---|
| stationid | ||||||
| 3508 | Harlem | Manhattan | 10027 | St Nicholas Ave & Manhattan Ave | 40.809725 | -73.953149 |
| 3621 | Astoria | Queens | 11102 | 27 Ave & 9 St | 40.773983 | -73.930913 |
| 3492 | East Harlem | Manhattan | 10035 | E 118 St & Park Ave | 40.800539 | -73.941995 |
| 3946 | Harlem | Manhattan | 10040 | St Nicholas Ave & W 137 St | 40.818477 | -73.947568 |
| 3081 | NaN | Brooklyn | 11206 | Graham Ave & Grand St | 40.711863 | -73.944024 |
| 3738 | Lower East Side | Manhattan | 10002 | Delancey St & Eldridge St | 40.719383 | -73.991479 |
| 501 | Murray Hill | Manhattan | 10017 | FDR Drive & E 35 St | 40.744219 | -73.971212 |
| 3737 | Lower East Side | Manhattan | 10002 | Stanton St & Norfolk St | 40.720747 | -73.986274 |
| 4024 | Harlem | Manhattan | 10039 | Macombs Pl & W 152 St | 40.826406 | -73.937948 |
| 3100 | Greenpoint | Brooklyn | 11222 | Nassau Ave & Newell St | 40.724813 | -73.947526 |
Los ponemos juntos:
dfa = pd.merge(dfa, dfstations[['boro','neighborhood','zipcode']],
how = 'left', left_on='start station id', right_on='stationid')
dfa = pd.merge(dfa, dfstations[['boro','neighborhood','zipcode']],
how = 'left', left_on='end station id', right_on='stationid')
dfa.head(10)
| start station id | end station id | boro_x | neighborhood_x | zipcode_x | boro_y | neighborhood_y | zipcode_y | |
|---|---|---|---|---|---|---|---|---|
| 0 | 224 | 3574 | Manhattan | Financial District | 10038 | Brooklyn | Prospect Heights | 11238 |
| 1 | 293 | 223 | Manhattan | NoHo | 10003 | Manhattan | West Village | 10011 |
| 2 | 379 | 515 | Manhattan | Chelsea | 10001 | Manhattan | Hell's Kitchen | 10036 |
| 3 | 3739 | 325 | Manhattan | West Village | 10014 | Manhattan | Gramercy | 10003 |
| 4 | 236 | 3124 | Manhattan | East Village | 10003 | Queens | Long Island City | 11101 |
| 5 | 471 | 497 | Brooklyn | Williamsburg | 11211 | Manhattan | Union Square | 10010 |
| 6 | 83 | 3579 | Brooklyn | Prospect Heights | 11208 | Brooklyn | NaN | 11216 |
| 7 | 3809 | 3372 | Manhattan | Midtown | NaN | Manhattan | Upper East Side | 10021 |
| 8 | 3463 | 174 | Manhattan | Union Square | 10003 | Manhattan | Kips Bay | 10010 |
| 9 | 406 | 421 | Brooklyn | Brooklyn Heights | 11201 | Brooklyn | Fort Greene | 11238 |
Recuento de viajes entre barrios:
trips=dfa[['neighborhood_x','neighborhood_y']]\
.loc[((dfa['boro_x']=='Manhattan')&(dfa['boro_y']=='Manhattan'))]\
.value_counts()
trips.head()
neighborhood_x neighborhood_y Chelsea Chelsea 19850 Upper West Side Upper West Side 14716 East Village East Village 14162 Lower East Side Lower East Side 8943 Hell's Kitchen Hell's Kitchen 7303 dtype: int64
Con el recuento de viajes podemos hacer los "enlaces":
links=pd.DataFrame.from_records(list(trips.index),\
columns=['start','end'])
links['trips']=trips.values
links.head(10)
| start | end | trips | |
|---|---|---|---|
| 0 | Chelsea | Chelsea | 19850 |
| 1 | Upper West Side | Upper West Side | 14716 |
| 2 | East Village | East Village | 14162 |
| 3 | Lower East Side | Lower East Side | 8943 |
| 4 | Hell's Kitchen | Hell's Kitchen | 7303 |
| 5 | East Harlem | East Harlem | 7277 |
| 6 | Yorkville | Yorkville | 5063 |
| 7 | East Village | Lower East Side | 5035 |
| 8 | Chelsea | West Village | 4987 |
| 9 | West Village | Chelsea | 4958 |
# Creación del gráfico
hv.extension('bokeh')
hv.output(size=200) ;
chord=hv.Chord(links[:50])
chord.opts(node_color='index',
edge_color='start',
label_index='index',
cmap='Category10',
edge_cmap='Category10',
title="AUTOR:\t\t\t\t\t\tDavid Fonfría\nGRÁFICO:\t\tChord Diagram")
# Guardamos archivo HTML interactivo
renderer = hv.renderer('bokeh')
renderer.save(chord, 'chord_diagram')